rm(list = ls())

# Load required packages
library(haven)
library(lmtest)
library(sandwich)
library(stargazer)
library(dplyr)

# Load dataset
cep_all <- read_dta("Data/cep_all.dta")

# Define dosage ranges and construct samples
dosage_ranges <- 1:5
samples <- lapply(dosage_ranges, function(i) {
  subset(cep_all, treatment_dosage_coup >= -i & treatment_dosage_coup <= i)
})
names(samples) <- paste0("sample", dosage_ranges)

# Clustered standard errors
cluster_se <- function(model, data, type = "HC1") {
  idx <- as.integer(rownames(model.frame(model)))
  vc  <- sandwich::vcovCL(model, cluster = data$cluster[idx], type = type)
  lmtest::coeftest(model, vcov. = vc)
}
# Run RD models and output LaTeX table
run_models <- function(samples, formula, dv_name, table_title, dep_caption, cov_label) {
  models <- lapply(samples, function(df) lm(formula, data = df))
  clustered <- Map(cluster_se, models, samples)
  n_obs <- sapply(models, nobs)
  r2 <- sapply(models, function(m) summary(m)$r.squared)
  mean_dv <- sapply(models, function(m) mean(m$model[[dv_name]], na.rm = TRUE))
  
  stargazer(clustered,
            type = "latex",
            title = table_title,
            keep = "^treat1$",
            column.labels = c("54 and 56", "53–57", "52–58", "51–59", "50–60"),
            covariate.labels = cov_label,
            dep.var.caption = dep_caption,
            dep.var.labels.include = FALSE,
            add.lines = list(
              c("Obs.", n_obs),
              c("$R^2$", round(r2, 3)),
              c("Mean DV", round(mean_dv, 3))
            ),
            align = TRUE)
}

# Table E1: Continuity Test on Gender
run_models(samples,
           formula = sexo_2 ~ treat1 + treatment_dosage_coup + treatment_dosage_coup:treat1 + as.factor(encuesta),
           dv_name = "sexo_2",
           table_title = "Table E1: Continuity Test on Gender",
           dep_caption = "Dependent Variable: Gender (1 = Female)",
           cov_label = "Coup")

print('Table E1 complete')

